require(knitr)
## Loading required package: knitr
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
library(readr)
##
## Attaching package: 'readr'
##
## The following objects are masked from 'package:scales':
##
## col_factor, col_numeric
#opts_knit$set(root.dir="/run/user//1001//gvfs/smb-share:server=biocldap,share=scratch/merrimanlab/murray/working_dir/extract/TajD/")
POP=""
TD=data.frame()
s=data.frame()
make_graph=function(TD,s){
ggplot(data = TD, aes(x=BIN_START, y=TajimaD), ylim=c(min(TD$TajimaD -0.5), max(TD$TajimaD + 0.5))) +
geom_point(shape = 16, colour= alpha("black", 1/5)) +
#geom_hex() +
facet_wrap(~CHROM, scales = "free_x") +
geom_hline(aes(yintercept= q1, colour ="quantile"), data=s,) +
geom_hline(aes(yintercept= q2, colour ="quantile"), data=s) +
geom_hline(aes(yintercept= mean, colour="mean"), data=s) +
scale_colour_manual("",breaks=c("mean","quantile"),values=c("green","red")) +
scale_x_continuous( xlab("Chromosome Position (Mbp)")) +
ylab("Tajima's D") +
ggtitle(paste0(POP," Tajima's D by Chromosome")) +
theme( plot.background= element_rect(colour="black",fill=NA), legend.position= c(0.75, 0.12)) +
theme_bw()
}
dir="/home/murraycadzow/smb_mounts/smb-share:server=biocldap,share=scratch/merrimanlab/murray/working_dir/extract/TajD/"
get_genes_in_region= function(regions, size){
library(RMySQL)
drv = dbDriver("MySQL")
ensembl_core = dbConnect(drv, user="anonymous", host="ensembldb.ensembl.org", dbname="homo_sapiens_core_75_37", password="")
for(i in 1:length(regions[,1])){
chr= as.data.frame(regions)[i,1]
pos1= as.data.frame(regions)[i,2]
pos2 = pos1+size
print(dbGetQuery(ensembl_core, paste0("select s.name, g.seq_region_start, g.seq_region_end, x.display_label, s.coord_system_id from gene g, seq_region s, xref x where s.name =", chr, " AND (", pos1 ," > g.seq_region_start AND ",pos2," < g.seq_region_end OR ",pos1," BETWEEN g.seq_region_start AND g.seq_region_end OR ",pos2," BETWEEN g.seq_region_start AND g.seq_region_end) AND g.display_xref_id = x.xref_id group by x.display_label AND s.seq_region_id = g.seq_region_id order by s.name *1, g.seq_region_start")))
}
}
for( POP in c("AXIOM","OMNI","CEU","CHB","CHS","GBR","YRI")){
print(POP)
TD=data.frame()
for( i in 1:22){
TD=rbind(TD,read.table(file = paste0(dir,POP,i,".taj_d"), header=TRUE))
}
s = TD %>% group_by(CHROM) %>% summarise(mean=mean(TajimaD), sd=sd(TajimaD), min=min(TajimaD), max=max(TajimaD), q1 = quantile(TajimaD, 0.01), q2 = quantile(TajimaD, 0.99))
print(as.data.frame(round(s, digits=2)))
plot(make_graph(TD,s))
top = TD %>% arrange(TajimaD) %>% head(n=10)
#filter(min(TajimaD) == TajimaD | max(TajimaD) == TajimaD)
print(as.data.frame(top))
get_genes_in_region(as.data.frame(top), size=30000)
}
## [1] "AXIOM"
## CHROM mean sd min max q1 q2
## 1 1 1.06 1.22 -2.41 5.26 -1.73 3.86
## 2 2 1.14 1.30 -2.58 5.17 -1.84 3.92
## 3 3 1.05 1.25 -2.44 5.01 -1.80 3.81
## 4 4 1.20 1.28 -2.41 5.13 -1.81 4.03
## 5 5 1.22 1.23 -2.52 4.91 -1.64 3.90
## 6 6 1.12 1.25 -2.35 5.01 -1.73 3.89
## 7 7 1.15 1.29 -2.38 5.10 -1.85 3.87
## 8 8 1.24 1.29 -2.47 4.97 -1.75 4.08
## 9 9 1.02 1.22 -2.39 5.04 -1.66 3.93
## 10 10 1.13 1.24 -2.51 4.81 -1.76 3.88
## 11 11 1.20 1.29 -2.45 4.79 -1.81 3.87
## 12 12 1.10 1.23 -2.58 5.11 -1.88 3.79
## 13 13 1.27 1.24 -2.37 4.69 -1.63 3.97
## 14 14 1.22 1.24 -2.48 4.87 -1.79 3.99
## 15 15 1.17 1.14 -2.18 5.08 -1.56 3.56
## 16 16 0.94 1.21 -2.24 4.71 -1.75 3.65
## 17 17 1.06 1.24 -2.40 4.54 -1.88 3.86
## 18 18 1.14 1.24 -2.51 4.73 -1.80 3.79
## 19 19 1.04 1.15 -2.43 4.48 -1.53 3.58
## 20 20 1.13 1.20 -2.41 4.83 -1.66 3.91
## 21 21 1.22 1.19 -1.97 4.60 -1.69 3.85
## 22 22 1.02 1.14 -2.27 4.50 -1.48 3.71
## CHROM BIN_START N_SNPS TajimaD
## 1 12 79140000 71 -2.57685
## 2 2 237420000 62 -2.57669
## 3 2 122340000 59 -2.56972
## 4 5 97110000 196 -2.52491
## 5 18 30660000 48 -2.51456
## 6 10 22560000 66 -2.50805
## 7 2 13560000 68 -2.50327
## 8 2 122160000 38 -2.49352
## 9 2 22230000 39 -2.48278
## 10 14 68490000 35 -2.47674
## Loading required package: DBI
## name seq_region_start seq_region_end display_label coord_system_id
## 1 12 78493824 79191463 RNF219-AS1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 237205505 237997288 RYR2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 122216466 122349367 PPAPDC1A 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 5 97094758 97123230 RP11-307E17.8 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 18 30565801 30660526 LINC00189 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 10 22513451 22567646 RP11-958F21.1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 13217497 13652754 LDLRAD4 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 122026130 122293579 RP11-820L6.1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 22208146 22242162 RP11-449D8.1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 14 68297986 68668670 GNG12-AS1 2
## [1] "OMNI"
## CHROM mean sd min max q1 q2
## 1 1 1.18 1.31 -2.58 5.38 -1.79 4.15
## 2 2 1.28 1.40 -2.57 5.05 -1.97 4.14
## 3 3 1.16 1.36 -2.49 5.43 -1.95 4.02
## 4 4 1.31 1.37 -2.47 5.14 -2.02 4.20
## 5 5 1.38 1.33 -2.46 5.78 -1.82 4.14
## 6 6 1.37 1.34 -2.55 5.52 -1.92 4.12
## 7 7 1.38 1.35 -2.65 5.37 -1.94 4.15
## 8 8 1.41 1.35 -2.52 5.14 -1.91 4.12
## 9 9 1.15 1.30 -2.30 5.33 -1.69 4.22
## 10 10 1.29 1.34 -2.45 5.10 -1.80 4.18
## 11 11 1.33 1.32 -2.49 5.63 -1.94 4.05
## 12 12 1.25 1.31 -2.49 5.33 -1.90 4.05
## 13 13 1.35 1.26 -2.43 5.18 -1.56 4.04
## 14 14 1.35 1.30 -2.46 5.13 -1.82 3.99
## 15 15 1.31 1.27 -2.36 4.99 -1.71 4.01
## 16 16 1.04 1.33 -2.62 4.95 -1.88 3.88
## 17 17 1.18 1.29 -2.61 4.96 -1.93 3.93
## 18 18 1.35 1.30 -2.37 5.10 -1.81 4.07
## 19 19 1.33 1.25 -2.21 4.88 -1.55 3.92
## 20 20 1.31 1.28 -2.37 5.10 -1.72 4.01
## 21 21 1.33 1.33 -2.53 5.51 -1.68 4.48
## 22 22 1.19 1.19 -2.46 4.67 -1.68 3.70
## CHROM BIN_START N_SNPS TajimaD
## 1 7 137220000 115 -2.65073
## 2 16 2700000 99 -2.61632
## 3 17 58530000 105 -2.61107
## 4 1 247980000 121 -2.57726
## 5 2 68790000 67 -2.57055
## 6 6 71850000 134 -2.55347
## 7 1 247950000 84 -2.54450
## 8 6 69870000 56 -2.53133
## 9 21 23370000 129 -2.52540
## 10 8 127110000 106 -2.52308
## name seq_region_start seq_region_end display_label coord_system_id
## 1 7 137150022 137225025 RP11-381K20.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 16 2673524 2740753 EBF4 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 17 58297400 58569959 RP11-325K19.1 2
## [1] name seq_region_start seq_region_end display_label
## [5] coord_system_id
## <0 rows> (or 0-length row.names)
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 68741234 68809906 PGM5P1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 6 71878626 71907790 RP11-669I1.1 2
## [1] name seq_region_start seq_region_end display_label
## [5] coord_system_id
## <0 rows> (or 0-length row.names)
## name seq_region_start seq_region_end display_label coord_system_id
## 1 6 69891208 69901481 AC125634.1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 21 23381263 23470778 AP000472.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 8 127019885 127115586 NEK6 2
## [1] "CEU"
## CHROM mean sd min max q1 q2
## 1 1 0.86 1.10 -2.65 5.07 -1.74 3.41
## 2 2 0.93 1.10 -2.58 4.61 -1.66 3.33
## 3 3 1.00 1.11 -2.48 5.41 -1.69 3.46
## 4 4 1.03 1.14 -2.46 4.96 -1.70 3.46
## 5 5 1.00 1.08 -2.27 4.35 -1.53 3.38
## 6 6 1.03 1.08 -2.49 4.50 -1.61 3.50
## 7 7 1.01 1.10 -2.34 4.67 -1.70 3.35
## 8 8 0.93 1.14 -2.46 5.19 -1.82 3.44
## 9 9 0.80 1.04 -2.38 4.92 -1.49 3.19
## 10 10 0.98 1.10 -2.25 5.08 -1.64 3.49
## 11 11 1.05 1.08 -2.28 4.75 -1.63 3.47
## 12 12 0.95 1.11 -2.29 4.94 -1.68 3.51
## 13 13 1.02 1.07 -2.46 5.06 -1.50 3.44
## 14 14 0.94 1.10 -2.45 5.35 -1.69 3.38
## 15 15 0.89 1.09 -2.42 4.20 -1.82 3.24
## 16 16 0.76 1.11 -2.01 4.07 -1.66 3.16
## 17 17 1.01 1.08 -2.29 4.34 -1.67 3.36
## 18 18 0.97 1.08 -2.24 4.30 -1.67 3.33
## 19 19 0.94 1.13 -2.55 4.88 -1.89 3.40
## 20 20 0.96 1.06 -2.58 4.11 -1.49 3.47
## 21 21 1.11 1.09 -2.15 5.13 -1.34 3.53
## 22 22 0.98 1.03 -2.15 4.11 -1.27 3.29
## CHROM BIN_START N_SNPS TajimaD
## 1 1 188850000 196 -2.64798
## 2 20 41520000 80 -2.58306
## 3 2 21780000 91 -2.58216
## 4 2 182580000 188 -2.56844
## 5 19 32070000 94 -2.55411
## 6 6 73830000 95 -2.49485
## 7 3 26280000 101 -2.47956
## 8 8 35610000 88 -2.46394
## 9 13 104850000 82 -2.45996
## 10 4 13320000 114 -2.45799
## name seq_region_start seq_region_end display_label coord_system_id
## 1 1 188577543 189152418 LINC01090 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 20 41516046 41523018 FAM74A6 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 21665003 22214734 CASC15 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 182511288 182639423 ATP11B 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 19 32028898 32145082 NRG1-IT2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 6 73859385 73862680 RP11-531A24.3 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 3 26212864 26430059 AP000235.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 8 35445892 35732332 AP000320.7 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 13 104864962 104893895 CASP5 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 4 13217497 13652754 LDLRAD4 2
## [1] "CHB"
## CHROM mean sd min max q1 q2
## 1 1 1.41 1.30 -2.25 5.41 -1.46 4.19
## 2 2 1.51 1.33 -2.45 5.28 -1.68 4.24
## 3 3 1.49 1.30 -2.30 5.62 -1.58 4.16
## 4 4 1.67 1.29 -2.47 5.50 -1.54 4.40
## 5 5 1.61 1.28 -2.36 5.44 -1.45 4.30
## 6 6 1.61 1.22 -2.36 5.29 -1.34 4.29
## 7 7 1.63 1.27 -2.30 5.13 -1.50 4.22
## 8 8 1.68 1.28 -2.40 5.14 -1.57 4.29
## 9 9 1.31 1.25 -2.23 5.02 -1.32 3.96
## 10 10 1.57 1.25 -2.35 5.65 -1.50 4.20
## 11 11 1.72 1.27 -2.24 5.09 -1.45 4.27
## 12 12 1.57 1.28 -2.21 5.57 -1.57 4.20
## 13 13 1.70 1.22 -2.08 5.20 -1.34 4.29
## 14 14 1.67 1.21 -2.39 5.14 -1.31 4.30
## 15 15 1.60 1.22 -2.00 5.41 -1.38 4.16
## 16 16 1.31 1.35 -2.35 4.83 -1.64 4.06
## 17 17 1.56 1.27 -2.23 5.00 -1.56 4.20
## 18 18 1.65 1.25 -2.08 5.30 -1.21 4.13
## 19 19 1.59 1.29 -2.37 5.17 -1.52 4.20
## 20 20 1.61 1.28 -2.09 5.48 -1.51 4.15
## 21 21 1.73 1.22 -1.92 4.97 -1.01 4.37
## 22 22 1.51 1.24 -2.14 4.55 -1.40 3.99
## CHROM BIN_START N_SNPS TajimaD
## 1 4 179820000 209 -2.47443
## 2 2 195030000 148 -2.44921
## 3 8 93030000 53 -2.39828
## 4 14 105780000 89 -2.38921
## 5 19 32070000 76 -2.37235
## 6 2 108960000 76 -2.36440
## 7 6 105870000 40 -2.35785
## 8 5 117510000 81 -2.35769
## 9 16 47820000 95 -2.35057
## 10 10 107220000 62 -2.34669
## name seq_region_start seq_region_end display_label coord_system_id
## 1 4 179694484 179914813 CCDC141 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 194995465 195163807 ACAP2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 8 93014884 93044347 C15orf32 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 14 105575031 105887950 RP11-556I14.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 19 32028898 32145082 NRG1-IT2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 108926358 108963917 SLC25A24P2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 6 105575031 105887950 RP11-556I14.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 5 116853124 117708503 ATRNL1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 16 47311134 48143999 MDGA2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 10 106962756 107242652 TBCK 2
## [1] "CHS"
## CHROM mean sd min max q1 q2
## 1 1 1.41 1.30 -2.25 5.44 -1.46 4.22
## 2 2 1.52 1.33 -2.41 5.17 -1.62 4.27
## 3 3 1.49 1.30 -2.41 5.95 -1.62 4.22
## 4 4 1.69 1.31 -2.14 5.67 -1.55 4.46
## 5 5 1.59 1.30 -2.30 5.39 -1.45 4.35
## 6 6 1.63 1.26 -2.32 5.49 -1.37 4.31
## 7 7 1.64 1.27 -2.19 5.38 -1.52 4.26
## 8 8 1.68 1.29 -2.30 5.42 -1.54 4.32
## 9 9 1.33 1.26 -2.44 4.72 -1.24 4.02
## 10 10 1.61 1.25 -1.99 5.67 -1.46 4.24
## 11 11 1.73 1.28 -2.13 5.13 -1.53 4.30
## 12 12 1.57 1.30 -2.14 5.10 -1.55 4.25
## 13 13 1.73 1.23 -2.08 5.43 -1.18 4.40
## 14 14 1.72 1.19 -1.82 5.36 -1.14 4.28
## 15 15 1.61 1.22 -2.16 5.49 -1.28 4.12
## 16 16 1.35 1.35 -2.23 5.18 -1.46 4.04
## 17 17 1.55 1.29 -2.39 5.31 -1.49 4.24
## 18 18 1.69 1.26 -2.11 5.34 -1.38 4.24
## 19 19 1.61 1.29 -2.32 5.00 -1.43 4.31
## 20 20 1.63 1.24 -2.36 5.01 -1.27 4.20
## 21 21 1.77 1.22 -1.76 4.97 -0.94 4.48
## 22 22 1.56 1.20 -2.38 4.68 -1.30 4.01
## CHROM BIN_START N_SNPS TajimaD
## 1 9 92310000 121 -2.43656
## 2 2 195030000 141 -2.41266
## 3 3 17370000 88 -2.41213
## 4 17 58830000 185 -2.39427
## 5 17 62970000 97 -2.38774
## 6 22 46770000 65 -2.37656
## 7 20 24870000 117 -2.36448
## 8 6 105900000 51 -2.32052
## 9 19 32070000 51 -2.31985
## 10 6 105870000 37 -2.30978
## name seq_region_start seq_region_end display_label coord_system_id
## 1 9 92339633 92400146 CASC6 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 194995465 195163807 ACAP2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 3 17354597 17428082 SLC7A2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 17 58662895 59102516 RP5-1043L13.1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 17 62931248 62997124 SLC22A25 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 22 46570039 46987717 DYM 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 20 24863206 24924358 UPB1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 6 105902805 106087316 RP11-341A22.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 19 32028898 32145082 NRG1-IT2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 6 105575031 105887950 RP11-556I14.2 2
## [1] "GBR"
## CHROM mean sd min max q1 q2
## 1 1 0.89 1.10 -2.49 4.87 -1.70 3.37
## 2 2 0.95 1.11 -2.57 4.49 -1.66 3.36
## 3 3 1.02 1.11 -2.56 5.26 -1.68 3.49
## 4 4 1.07 1.16 -2.45 4.90 -1.65 3.58
## 5 5 1.03 1.07 -2.43 4.92 -1.47 3.34
## 6 6 1.07 1.09 -2.42 4.85 -1.61 3.53
## 7 7 1.05 1.11 -2.55 4.81 -1.74 3.52
## 8 8 0.96 1.14 -2.46 5.01 -1.87 3.42
## 9 9 0.82 1.04 -2.38 4.53 -1.51 3.23
## 10 10 1.01 1.07 -2.37 4.90 -1.45 3.47
## 11 11 1.08 1.06 -2.35 5.10 -1.70 3.41
## 12 12 0.98 1.14 -2.38 4.79 -1.68 3.59
## 13 13 1.10 1.07 -2.19 4.65 -1.43 3.45
## 14 14 0.95 1.11 -2.49 4.08 -1.74 3.34
## 15 15 0.99 1.09 -2.31 4.47 -1.70 3.38
## 16 16 0.77 1.14 -2.29 4.07 -1.79 3.21
## 17 17 1.05 1.10 -2.23 4.47 -1.70 3.39
## 18 18 1.02 1.08 -2.09 4.34 -1.58 3.38
## 19 19 0.97 1.12 -2.36 4.51 -1.90 3.40
## 20 20 0.97 1.05 -2.55 4.52 -1.64 3.33
## 21 21 1.11 1.10 -2.38 4.76 -1.48 3.58
## 22 22 1.06 1.03 -2.21 4.31 -1.13 3.29
## CHROM BIN_START N_SNPS TajimaD
## 1 2 182580000 186 -2.56737
## 2 3 26280000 99 -2.56121
## 3 20 41520000 76 -2.55033
## 4 7 137220000 169 -2.54973
## 5 2 21780000 88 -2.54636
## 6 1 35640000 43 -2.49439
## 7 14 98070000 85 -2.49139
## 8 8 23820000 62 -2.45889
## 9 4 91320000 191 -2.45368
## 10 1 28020000 50 -2.43462
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 182511288 182639423 ATP11B 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 3 26212864 26430059 AP000235.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 20 41516046 41523018 FAM74A6 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 7 137150022 137225025 RP11-381K20.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 21665003 22214734 CASC15 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 1 35445892 35732332 AP000320.7 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 14 98096514 98103932 RP11-461F11.3 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 8 23817659 23821323 TATDN2P3 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 4 91260558 91358859 BLM 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 1 28000021 28344504 OCA2 2
## [1] "YRI"
## CHROM mean sd min max q1 q2
## 1 1 0.42 0.71 -1.80 4.06 -1.12 2.35
## 2 2 0.46 0.70 -1.91 3.92 -1.07 2.26
## 3 3 0.50 0.70 -1.75 4.32 -1.02 2.44
## 4 4 0.52 0.75 -1.90 3.88 -1.10 2.56
## 5 5 0.45 0.71 -1.90 4.03 -1.08 2.33
## 6 6 0.51 0.76 -1.70 4.12 -1.09 2.62
## 7 7 0.48 0.74 -1.62 4.34 -1.12 2.41
## 8 8 0.46 0.70 -1.81 3.41 -1.01 2.25
## 9 9 0.41 0.68 -1.77 3.42 -1.05 2.31
## 10 10 0.50 0.72 -1.87 3.64 -1.04 2.51
## 11 11 0.48 0.71 -1.51 3.65 -1.05 2.44
## 12 12 0.47 0.72 -1.81 3.91 -1.09 2.44
## 13 13 0.52 0.74 -1.83 4.12 -1.03 2.47
## 14 14 0.49 0.70 -1.41 4.30 -0.98 2.39
## 15 15 0.48 0.68 -1.89 3.76 -1.01 2.19
## 16 16 0.39 0.67 -1.70 3.17 -1.10 2.10
## 17 17 0.45 0.69 -1.79 3.02 -1.08 2.26
## 18 18 0.49 0.66 -1.71 3.01 -0.92 2.09
## 19 19 0.55 0.72 -1.85 3.75 -1.15 2.39
## 20 20 0.50 0.66 -1.61 3.12 -1.07 2.23
## 21 21 0.58 0.73 -1.46 4.87 -0.82 2.73
## 22 22 0.53 0.70 -1.31 3.37 -1.04 2.25
## CHROM BIN_START N_SNPS TajimaD
## 1 2 84390000 162 -1.90543
## 2 4 74910000 186 -1.90023
## 3 5 83820000 129 -1.89752
## 4 15 24480000 536 -1.89199
## 5 10 134430000 128 -1.87367
## 6 19 29160000 256 -1.85046
## 7 13 53010000 152 -1.82883
## 8 12 74250000 204 -1.81119
## 9 8 99570000 76 -1.80970
## 10 1 175230000 205 -1.79954
## name seq_region_start seq_region_end display_label coord_system_id
## 1 2 84304628 84391815 RP11-154D17.1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 4 74920346 74958126 RP11-63P12.6 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 5 83847934 84108197 RP11-382A20.4 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 15 24437813 24503528 AP001255.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 10 134316643 134979309 EPHB1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 19 29143289 29163375 AL935156.1 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 13 52949815 53137158 AC010967.2 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 12 74233341 74280319 RP11-505P4.7 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 8 99497378 99646030 RP11-654A16.3 2
## name seq_region_start seq_region_end display_label coord_system_id
## 1 1 174156363 175523428 NAALADL2 2
Distance matrix is created from taking 1000 most negative TD results and finding the number of windows that overlap between each population
library("RMySQL")
drv=dbDriver(drvName = "MySQL")
db=dbConnect(drv, host="127.0.0.1", user="murray", db="selection")
axiomTD= dbGetQuery(db, "select * from tajimasd where Population = 'AXIOM' and TajimasD < 0 order by TajimasD limit 1000;")
omniTD=dbGetQuery(db, "select * from tajimasd where Population = 'OMNI' and TajimasD < 0 order by TajimasD limit 1000;")
ceuTD=dbGetQuery(db, "select * from tajimasd where Population = 'CEU' and TajimasD < 0 order by TajimasD limit 1000;")
chbTD=dbGetQuery(db, "select * from tajimasd where Population = 'CHB' and TajimasD < 0 order by TajimasD limit 1000;")
chsTD=dbGetQuery(db, "select * from tajimasd where Population = 'CHS' and TajimasD < 0 order by TajimasD limit 1000;")
gbrTD=dbGetQuery(db, "select * from tajimasd where Population = 'GBR' and TajimasD < 0 order by TajimasD limit 1000;")
yriTD=dbGetQuery(db, "select * from tajimasd where Population = 'YRI' and TajimasD < 0 order by TajimasD limit 1000;")
overlap = function(pop1, pop2){
return(length(merge(pop1, pop2, by = c("chrom", "chrom_start"))[,1]))
}
axiom_omniTD = overlap(axiomTD,omniTD)
axiom_ceuTD = overlap(axiomTD,ceuTD)
axiom_chbTD =overlap(axiomTD,chbTD)
axiom_chsTD = overlap(axiomTD,chsTD)
axiom_gbrTD = overlap(axiomTD,gbrTD)
axiom_yriTD = overlap(axiomTD,yriTD)
omni_ceuTD = overlap(omniTD,ceuTD)
omni_chbTD = overlap(omniTD,chbTD)
omni_chsTD = overlap(omniTD,chsTD)
omni_gbrTD = overlap(omniTD,gbrTD)
omni_yriTD = overlap(omniTD,yriTD)
ceu_chbTD = overlap(ceuTD,chbTD)
ceu_chsTD = overlap(ceuTD,chsTD)
ceu_gbrTD = overlap(ceuTD,gbrTD)
ceu_yriTD = overlap(ceuTD,yriTD)
chb_chsTD = overlap(chbTD, chsTD)
chb_gbrTD = overlap(chbTD, gbrTD)
chb_yriTD = overlap(chbTD, yriTD)
chs_gbrTD = overlap(chsTD, gbrTD)
chs_yriTD = overlap(chsTD, yriTD)
gbr_yriTD = overlap(gbrTD,yriTD)
# make distance matrix
dist = matrix(nrow = 7, ncol=7)
colnames(dist) = c('axiom','omni','ceu','chb','chs','gbr','yri')
rownames(dist) = c('axiom','omni','ceu','chb','chs','gbr','yri')
dist[,1]= c(0,axiom_omniTD,axiom_ceuTD,axiom_chbTD,axiom_chsTD,axiom_gbrTD,axiom_yriTD)
dist[,2]= c(axiom_omniTD,0,omni_ceuTD,omni_chbTD,omni_chsTD,omni_gbrTD,omni_yriTD)
dist[,3]= c(axiom_ceuTD,omni_ceuTD,0,ceu_chbTD,ceu_chsTD,ceu_gbrTD,ceu_yriTD)
dist[,4]= c(axiom_ceuTD,omni_ceuTD,ceu_chbTD,0,chb_chsTD,chb_gbrTD,chb_yriTD)
dist[,5]= c(axiom_ceuTD,omni_ceuTD,ceu_chbTD,chb_chsTD,0,chs_gbrTD,chs_yriTD)
dist[,6]= c(axiom_ceuTD,omni_ceuTD,ceu_chbTD,chb_chsTD,chs_gbrTD,0,gbr_yriTD)
dist[1,]=t(dist[,1])
dist[2,]=t(dist[,2])
dist[3,]=t(dist[,3])
dist[4,]=t(dist[,4])
dist[5,]=t(dist[,5])
dist[6,]=t(dist[,6])
dist2 = 1/dist
dist2[1,1]=0;dist2[2,2]=0;dist2[3,3]=0;dist2[4,4]=0;dist2[5,5]=0;dist2[6,6]=0;dist2[7,7]=0
plot(hclust(dist(dist2)))
dist
## axiom omni ceu chb chs gbr yri
## axiom 0 296 133 181 200 129 37
## omni 296 0 116 237 235 116 30
## ceu 133 116 0 99 109 577 52
## chb 181 237 99 0 518 105 31
## chs 200 235 109 518 0 115 39
## gbr 129 116 577 105 115 0 36
## yri 37 30 52 31 39 36 NA
#intersection of axiom and omni TAJIMAs D
library(VennDiagram)
## Loading required package: grid
grid.newpage()
draw.pairwise.venn(length(axiomTD[,1]),length(omniTD[,1]),length(merge(axiomTD, omniTD, by = c("chrom", "chrom_start"))[,1]), c("NZ Maori", "Samoan"), col = c("red","blue"), fill = c("red","blue"))
## (polygon[GRID.polygon.6966], polygon[GRID.polygon.6967], polygon[GRID.polygon.6968], polygon[GRID.polygon.6969], text[GRID.text.6970], text[GRID.text.6971], text[GRID.text.6972], text[GRID.text.6973], text[GRID.text.6974])